package cn.chencaiju.day12;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.junit.Test;

public class MyCreWler {
	static String URL_PATTERN="((ht|f)tps?):\\/\\/[\\w\\-]+(\\.[\\w\\-]+)+([\\w\\-"
			+ "\\.,@?^=%&:\\/~\\+#]*[\\w\\-\\@?^=%&\\/~\\+#])?";
	@Test public void test() {
		Line("http://www.jikedaohang.com/");
	}
	
	/**
	 * 获取url的内容，找出包含的网址
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		//下载网页
		URL url=new URL("http://www.jikedaohang.com/");
		//逐行读取网页内容
		InputStream webStream=url.openStream();
		BufferedReader reader=new BufferedReader(
				new InputStreamReader(webStream, "utf-8"));
		String line=null;
		while((line=reader.readLine())!=null) {
			Line(line);
		}
	}
	
	/**
	 * 打印网址
	 * 匹配的字符
	 * @param line
	 */
	public static void Line(String  line) {
		Pattern pattern=Pattern.compile(URL_PATTERN);
		// 每次匹配到的字符串
		Matcher matcher = pattern.matcher(line);
		while (matcher.find()) {	// 找到一个匹配的字符串
			System.out.println(matcher.group());	// 取出匹配字符串
		}
	}
}
